# -*- coding=utf-8 -*-
# 2021.
# @zlk 
# zhanglk960127@163.com
# 查找丢失基因片段的大小

import sys
import os
import re
file1=open(sys.argv[1],'r')# pangenome file

file2=open(sys.argv[2],'w')
msyn_fold=sys.argv[3]
index=int(sys.argv[-1])
# print(index)
num=0
ortho_dict={}
for file in os.listdir(msyn_fold):
    if file.startswith('mSynF'):
        for ortho_file in os.listdir(msyn_fold+'/'+file):
            
            if ortho_file.endswith('ortho') and str(index-3) in re.findall('\d+',ortho_file):
                # print(ortho_file)
                col_num=re.findall('\d+',ortho_file).index(str(index-3))
                op_file=open(msyn_fold+'/'+file+'/'+ortho_file,'r')
                for line in op_file:
                    line_list=line.strip().split('\t')
                    ortho_dict[line_list[abs(1-col_num)]]=line_list[col_num]
                    # ortho_dict.append(line_list[1])
        
total_list=[]
gene_list=[]
num_list=[]
for line in file1:
    line_list=line[:-1].split('\t')
    index1=0
    for i in line_list[4:]:
        if i!='':
            index1+=1
    if index1<2:
        continue
    # if len(line.strip().split('\t'))==4:
    #     # print(line_list)
    #     # sys.exit()
    #     continue
    if line_list[3]=='SL4.0ch00':
        continue
    if line_list[index]=='-' :
        gene_list.append(line_list[3:])
        num+=1
    else:
        if num!=0:
            total_list.append(gene_list)
            num_list.append(num)
        num=0
        gene_list=[]
# print(sum(num_list)/len(num_list))
# print(sum(num_list))
# print(len(num_list))
# print(max(num_list))
index1=0
index1_2=0
index1_3=0
index1_4=0
index2=0
index3=0
line_index=0
for i_list in total_list:
    line_index+=1
    for i in i_list:
        file2.write(str(line_index)+'\t'+str(len(i_list))+'\t')
        flag=0
        tran_gene_list=[]
        # print(i)
        for x in i:
            if x=='-':
                continue
            elif x in ortho_dict.keys():
                tran_gene_list.append(ortho_dict[x])
                flag=1
            # else:
            #     tran_gene_list.append('x')
        if flag==0:
            file2.write('no\t')
        else:
            if len(list(set(tran_gene_list)))==1:
                file2.write(tran_gene_list[0]+'\t')
            else:
                file2.write('no\t')
        for x in i:
            if x=='-':
                continue
            file2.write(x+'\t')
        file2.write('\n')
    i=len(i_list)   
            
    if i<2:
        index1+=i
    elif i<3:
        index1_2+=i
    elif i<4:
        index1_3+=i
    elif i<5:
        index1_4+=i
    elif i<6:
        index2+=i
    else:
        index3+=i
    # file2.write(str(i)+'\n')
print(index,'\t',index1,'\t',index1_2,'\t',index1_3,'\t',index1_4,'\t',index2,'\t',index3)